#########################
### Results for Justice as Checks and Balances
### Using the LBSF
##########################

### In this script the source dataset is the LBSF data
##  This code creates:
## Perpetrator Analysis
## Online appendix analysis for LBSF

# Preliminaries --
rm(list=ls())
gc()

### Libraries
###########################
###--------------------------
# Relevant libraries
# Next, we download packages.
pkgs <- c("foreign", "dplyr", "ggplot2", "gridExtra", "multiwayvcov", "lmtest", "tidytext", 
          "stringr", "tm", "quanteda", 
          "RJSONIO", "RCurl")
for (pkg in pkgs) {
  if (! (pkg %in% rownames(installed.packages()))) { install.packages(pkg) }
  lapply(pkg, library, character.only=T )
}

#### Read LBSF 
## Panel at province-decade level  with population and favorable ruling 
#lbsf <- read.csv("C://Users/franc/Dropbox/PhD/Edgar_Tlaxcala/AGN/lbsf.csv")
#gerhard_key <- read.csv("C://Users/franc/Dropbox/Aztec/Gerhard/gerhard_key.csv")
#lbsf <- lbsf %>% left_join(gerhard_key, by="NewID")
# lbsf <- lbsf %>% mutate(year=as.numeric(year))
# lbsf <- lbsf %>% select("X","year","month1" , "Day","name", "SUBDELEGACION", "PUEBLO", "Compliant","Defendant" , "Compliant_Indios"  , 
#                         "Compliant_Indios_indiv" , "Defendant_LE" ,"Defendant_Buro" , 
#                         "Defendant_Church" , "Defendant_Indian_Gob" ,"Defendant_Indians", "Defendant_Other" , "protect" ,
#                         "tierras", "abusos", "impuestos", "comunidad", "Complaint_cat", "Defendant_cat", "Defendant1.1" ,"Text_clean" )
# 
# write.csv(lbsf, "C://Users/franc/Dropbox/PhD/Edgar_Tlaxcala/AGN/lbsf_2021.csv")
lbsf <- read.csv("C://Users/franc/Dropbox/PhD/Edgar_Tlaxcala/AGN/lbsf_2021.csv")

#################
# Proportions
################
table(lbsf$protect)

### LE
table(lbsf$Defendant_LE)
table(lbsf$Defendant_LE, lbsf$protect)/ table(lbsf$Defendant_LE)[2]

### Buro
table(lbsf$Defendant_Buro)
table(lbsf$Defendant_Buro, lbsf$protect)/ table(lbsf$Defendant_Buro)[2]

### Church
table(lbsf$Defendant_Church)
table(lbsf$Defendant_Church, lbsf$protect)/ table(lbsf$Defendant_Church)[2]


### Indian Gob
table(lbsf$Defendant_Indian_Gob)
table(lbsf$Defendant_Indian_Gob, lbsf$protect)/ table(lbsf$Defendant_Indian_Gob)[2]


#######################
##  
##   Perpetrator Analysis 
##
#######################

#### ALL topics 
mod_all <- lm(protect~Defendant_LE+ Defendant_Buro+Defendant_Church+Defendant_Indian_Gob+as.numeric(year) , 
              data=lbsf[which(lbsf$name!="Mexico")  ,])
mod_all_se <- cluster.vcov(mod_all   , ~ lbsf[which(lbsf$name!="Mexico") ,]$name)
mod_all_se <- coeftest(mod_all  , mod_all_se )


#summary(mod_all ) # 1285 dfs
## FE
mod_all_fe <- lm(protect~Defendant_LE+ Defendant_Buro+Defendant_Church+Defendant_Indian_Gob+as.numeric(year)*name, 
                 data=lbsf[which(lbsf$name!="Mexico")  ,])
mod_all_se_fe <- cluster.vcov(mod_all_fe   , ~ lbsf[which(lbsf$name!="Mexico") ,]$name)
mod_all_se_fe <- coeftest(mod_all_fe , mod_all_se_fe )

#summary(mod_all_fe) # dfs 1072 
#### Tierras
mod_land <- lm(protect~Defendant_LE+ Defendant_Buro+Defendant_Church+Defendant_Indian_Gob +as.numeric(year), 
               data=lbsf[ which(lbsf$name!="Mexico" & lbsf$tierras==1
               ) ,])
mod_land_se <- cluster.vcov(mod_land   , ~ lbsf[ which(lbsf$name!="Mexico"& lbsf$tierras==1) ,]$name)
mod_land_se <- coeftest(mod_land  , mod_land_se)

### FE 
mod_land_fe <- lm(protect~Defendant_LE+ Defendant_Buro+Defendant_Church+Defendant_Indian_Gob +as.numeric(year)*name, 
                  data=lbsf[ which(lbsf$name!="Mexico" & lbsf$tierras==1
                  ) ,])
mod_land_se_fe <- cluster.vcov(mod_land_fe   , ~ lbsf[ which(lbsf$name!="Mexico"& lbsf$tierras==1
) ,]$name)
mod_land_se_fe <- coeftest(mod_land_fe  , mod_land_se_fe)



#### Mistreatment
mod_abuse <- lm(protect~Defendant_LE+ Defendant_Buro+Defendant_Church+Defendant_Indian_Gob +as.numeric(year) , 
                data=lbsf[ which(lbsf$name!="Mexico" & lbsf$abusos==1
                ) ,])
mod_abuse_se <- cluster.vcov(mod_abuse  , ~ lbsf[ which(lbsf$name!="Mexico" & lbsf$abusos==1
) ,]$name)
mod_abuse_se <- coeftest(mod_abuse  , mod_abuse_se)
### FE
mod_abuse_fe <- lm(protect~Defendant_LE+ Defendant_Buro+Defendant_Church+Defendant_Indian_Gob +as.numeric(year)*name , 
                   data=lbsf[ which(lbsf$name!="Mexico" & lbsf$abusos==1
                   ) ,])
mod_abuse_se_fe <- cluster.vcov(mod_abuse_fe  , ~ lbsf[ which(lbsf$name!="Mexico" & lbsf$abusos==1
) ,]$name)
mod_abuse_se_fe <- coeftest(mod_abuse_fe  , mod_abuse_se_fe)



#### Impuestos
mod_tax <- lm(protect~Defendant_LE+ Defendant_Buro+Defendant_Church+Defendant_Indian_Gob +as.numeric(year), 
              data=lbsf[ which(lbsf$name!="Mexico" & lbsf$impuestos==1
              ) ,])
mod_tax_se <- cluster.vcov(mod_tax  , ~ lbsf[ which(lbsf$name!="Mexico" & lbsf$impuestos==1
) ,]$name)
mod_tax_se <- coeftest(mod_tax  , mod_tax_se)
## FE
mod_tax_fe <- lm(protect~Defendant_LE+ Defendant_Buro+Defendant_Church+Defendant_Indian_Gob +as.numeric(year)*name, 
                 data=lbsf[ which(lbsf$name!="Mexico" & lbsf$impuestos==1
                 ) ,])
mod_tax_se_fe <- cluster.vcov(mod_tax_fe  , ~ lbsf[ which(lbsf$name!="Mexico" & lbsf$impuestos==1
) ,]$name)
mod_tax_se_fe <- coeftest(mod_tax_fe  , mod_tax_se_fe)


#### Community
mod_community <- lm(protect~Defendant_LE+ Defendant_Buro+Defendant_Church+Defendant_Indian_Gob +as.numeric(year), 
                    data=lbsf[ which(lbsf$name!="Mexico" & lbsf$comunidad==1
                    ),])
mod_community_se <- cluster.vcov(mod_community  , ~ lbsf[ which(lbsf$name!="Mexico" & lbsf$comunidad==1
),]$name)
mod_community_se <- coeftest(mod_community  , mod_community_se)
## FE
mod_community_fe <- lm(protect~Defendant_LE+ Defendant_Buro+Defendant_Church+Defendant_Indian_Gob +as.numeric(year)*name, 
                       data=lbsf[ which(lbsf$name!="Mexico" & lbsf$comunidad==1
                       ),])
mod_community_se_fe <- cluster.vcov(mod_community_fe  , ~ lbsf[ which(lbsf$name!="Mexico" & lbsf$comunidad==1
),]$name)
mod_community_se_fe <- coeftest(mod_community_fe  , mod_community_se_fe)

###### ------------------------------------------------------------------
#   Figure 3: Estimates of favorable ruling by perpertators and type of claims
###### ------------------------------------------------------------------

#### All cases
all_est <- as.data.frame(mod_all_se[2:5,1:2])
rownames(all_est) <- c("Local elites", "Bureaucrats", "Church", "Indian leaders")
colnames(all_est) <- c("coef", "se")
all_est$ID <- c(1:4)
all_plot <- ggplot(all_est ) + geom_pointrange(aes(x=reorder(rownames(all_est),-ID), y=coef, ymin=coef-1.96*se, ymax=coef+1.96*se))
all_plot <- all_plot + theme_bw() + geom_hline(yintercept = 0, linetype="dashed") + coord_flip()
all_plot <- all_plot + xlab("") + ylab("") + theme(axis.text=element_text(size=10),
                                                                          axis.title=element_text(size=12,face="bold"))
all_plot <- all_plot + labs(caption="All Cases") +  theme(plot.caption = element_text(hjust=0.5, size=rel(1), vjust = 5))
all_plot
##### LAND
land_est <- as.data.frame(mod_land_se[2:5,1:2])
rownames(land_est) <- c("Local elites", "Bureaucrats", "Church", "Indian leaders")
colnames(land_est) <- c("coef", "se")
land_est$ID <- c(1:4)
land_plot <- ggplot(land_est ) + geom_pointrange(aes(x=reorder(rownames(land_est),-ID), y=coef, ymin=coef-1.96*se, ymax=coef+1.96*se))
land_plot <- land_plot + theme_bw() + geom_hline(yintercept = 0, linetype="dashed") + coord_flip()
land_plot <- land_plot + xlab("") + ylab("") + theme(axis.text=element_text(size=10),
                               axis.title=element_text(size=12,face="bold"))
land_plot <- land_plot + labs(caption="Land") +  theme(plot.caption = element_text(hjust=0.5, size=rel(1), vjust = 5))

##### Abuses
abuse_est <- as.data.frame(mod_abuse_se[2:5,1:2])
rownames(abuse_est) <- c("Local elites", "Bureaucrats", "Church", "Indian leaders")
colnames(abuse_est) <- c("coef", "se")
abuse_est$ID <- c(1:4)
abuse_plot <- ggplot(abuse_est ) + geom_pointrange(aes(x=reorder(rownames(abuse_est),-ID), y=coef, ymin=coef-1.96*se, ymax=coef+1.96*se))
abuse_plot <- abuse_plot + theme_bw() + geom_hline(yintercept = 0, linetype="dashed") + coord_flip()
abuse_plot <- abuse_plot + xlab("") + ylab("") + theme(axis.text=element_text(size=10),
                                                                                   axis.title=element_text(size=12,face="bold"))
abuse_plot <- abuse_plot + labs(caption="Mistreatment") +  theme(plot.caption = element_text(hjust=0.5, size=rel(1), vjust = 5))

##### Taxes
tax_est <- as.data.frame(mod_tax_se[2:5,1:2])
rownames(tax_est ) <- c("Local elites", "Bureaucrats", "Church", "Indian leaders")
colnames(tax_est) <- c("coef", "se")
tax_est$ID <- c(1:4)
tax_plot <- ggplot(tax_est ) + geom_pointrange(aes(x=reorder(rownames(tax_est),-ID), y=coef, ymin=coef-1.96*se, ymax=coef+1.96*se))
tax_plot <- tax_plot + theme_bw() + geom_hline(yintercept = 0, linetype="dashed") + coord_flip()
tax_plot <- tax_plot + xlab("") + ylab("") + theme(axis.text=element_text(size=10),
                                                                      axis.title=element_text(size=12,face="bold"))
tax_plot <- tax_plot + labs(caption="Taxes") +  theme(plot.caption = element_text(hjust=0.5, size=rel(1), vjust = 5))

#### Community
comm_est <- as.data.frame(mod_community_se[2:5,1:2])
rownames(comm_est) <- c("Local elites", "Bureaucrats", "Church", "Indian leaders")
colnames(comm_est) <- c("coef", "se")
comm_est$ID <- c(1:4)
comm_plot <- ggplot(comm_est ) + geom_pointrange(aes(x=reorder(rownames(comm_est),-ID), y=coef, ymin=coef-1.96*se, ymax=coef+1.96*se))
comm_plot <- comm_plot + theme_bw() + geom_hline(yintercept = 0, linetype="dashed") + coord_flip()
comm_plot <- comm_plot + xlab("") + ylab("") + theme(axis.text=element_text(size=10),
                                                                            axis.title=element_text(size=12,face="bold"))
comm_plot <- comm_plot + labs(caption="Community") +  theme(plot.caption = element_text(hjust=0.5, size=rel(1), vjust = 5))
### Grid
setwd("C://Users/franc/Dropbox/PhD/dissertation/paper_colonialism_liberalims/tex/FINAL_version_2021/figures_paper/")
png("figure3.tiff", width = 8, height = 8, units = 'in', res = 8000)
grid.arrange(all_plot, land_plot, abuse_plot, tax_plot, comm_plot,
             ncol = 2)
dev.off()

###--------------------------------------------------------
#
#  Tables A2.3 and A2.4
#
###--------------------------------------------------------

###### Units
all        <- length(unique(lbsf$name[which(lbsf$name!="Mexico")]))
land_unit  <- length(unique(lbsf$name[which(lbsf$name!="Mexico" & lbsf$tierras==1)]))
abuse_unit <- length(unique(lbsf$name[which(lbsf$name!="Mexico" & lbsf$abusos==1)]))
tax_unit   <- length(unique(lbsf$name[which(lbsf$name!="Mexico" & lbsf$impuestos==1)]))
com_unit   <- length(unique(lbsf$name[which(lbsf$name!="Mexico" & lbsf$comunidad==1)]))

unit_line <- c(all, land_unit, abuse_unit, tax_unit, com_unit)
##### Stargazer
#### Without FE  Table A3
stargazer(mod_all, mod_land,mod_abuse, mod_tax, mod_community, 
          se   = list(mod_all_se[, 2], mod_land_se[, 2],mod_abuse_se[,2], mod_tax_se[,2] , mod_community_se[,2] 
          ) ,
          p    = list(mod_all_se[, 4], mod_land_se[, 4], mod_abuse_se[,4],mod_tax_se[,4] , mod_community_se[,4]
          ),
          dep.var.labels= c("Favorable Ruling"),
          covariate.labels =c("Local Elites","Bureaucrats",  "Church", 
                              "Indian elites"
          ),
          column.labels = c("All", "Land", "Mistreatment", "Taxes", "Community"
          ),
          column.separate = c(1,1,1,1),
          object.names = F,
          #model.names = c("", "Decrease", "Increase", "Strong", "Weak", "Strong", "Weak"),
          keep = c("Defendant_L", "Defendant_Buro", "Defendant_Church", "Defendant_Indian_Gob"), 
          keep.stat = c("n", "adj.rsq"),
          add.lines=list(c("Province FE", "N", "N", "N", "N", "N", "N","N"),
                         c("Time LT", "Y", "Y", "Y", "Y","Y", "Y","Y")
          ),
          label="dictionary_vulner_merit",
          title =("Favorable Ruling, Topics, and Actors") 
          #out="model_actors.tex"
          )

#### With Prvince FE and TC   Table A4
stargazer(mod_all_fe, mod_land_fe,mod_abuse_fe, mod_tax_fe, mod_community_fe, 
          se   = list(mod_all_se_fe[, 2], mod_land_se_fe[, 2],mod_abuse_se_fe[,2], mod_tax_se_fe[,2] , mod_community_se_fe[,2] 
          ) ,
          p    = list(mod_all_se_fe[, 4], mod_land_se_fe[, 4], mod_abuse_se_fe[,4],mod_tax_se_fe[,4] , mod_community_se_fe[,4]
          ),
          dep.var.labels= c("Favorable Ruling"),
          covariate.labels =c("Local Elites","Bureaucrats",  "Churchmen", 
                              "Indian elites"
          ),
          column.labels = c("All", "Land", "Mistreatment", "Taxes", "Community"
          ),
          column.separate = c(1,1,1,1),
          object.names = F,
          #model.names = c("", "Decrease", "Increase", "Strong", "Weak", "Strong", "Weak"),
          keep = c("Defendant_L", "Defendant_Buro", "Defendant_Church", "Defendant_Indian_Gob"), 
          keep.stat = c("n", "adj.rsq"),
          add.lines=list(c("Province FE", "Y", "Y", "Y", "Y", "Y"),
                         c("Time LT", "Y", "Y", "Y", "Y","Y"),
                         c("Province time trends", "Y", "Y", "Y", "Y","Y")
                         
          ),
          label="dictionary_vulner_merit",
          title =("Favorable Ruling, Topics, and Actors (FE)") 
          #out="model_actors_fe.tex"
          )



#######################
##  
##   Suplementary Material S3
##
#######################

## Distribution of Complainant and Defendant --------------------------
### Table S3.2
### Complainants
## Totals
table(lbsf$Complaint_cat, useNA = c("ifany"))
## Percentages
table(lbsf$Complaint_cat, useNA = c("ifany"))/ nrow(lbsf)

### Defendants
### Complainants
table(lbsf$Defendant_cat, useNA = c("ifany"))
### Defendants
table(lbsf$Defendant_cat, useNA = c("ifany"))/ nrow(lbsf)


## Table for number of claimsby province  ---------------------------------
### Table S3.3
claims_prov <- lbsf %>% group_by(SUBDELEGACION) %>% summarise(claims=n())
claims_prov <- claims_prov %>% arrange(-claims)
## Provinces with top 10
head(claims_prov,12)

## Monthly plot -----------------------------------------------
# Figure S3.3
claims_2019_month <- lbsf %>% group_by(month1) %>% summarise(claims=n()) %>% filter(!is.na(month1))
claims_2019_month$month <- c("January", "February", "March", "April", "May", "June", 
                             "July", "August", "September", "October", "November", "December")
claims_2019_month$month2 <- factor(claims_2019_month$month, month.name)

clim_plot <- ggplot(claims_2019_month, aes(x=month2, y=claims, group=1)) + geom_line()
clim_plot <- clim_plot + theme_bw() + xlab("") +ylab("Total Claims")
clim_plot <- clim_plot + theme(axis.text.x = element_text(angle = 45, hjust = 1))
clim_plot
#### Save Figure S3.3
# ggsave("C://Users/franc/Dropbox/PhD/dissertation/paper_colonialism_liberalims/tex/2019_version/clim_plot.png",
#        clim_plot, width=7 )


## Number of claims by individual town ------------------------
## Figure S3.4
intensity  <- lbsf %>% group_by(PUEBLO) %>% summarise(times=n()) 
intensity2 <- intensity %>%  group_by(times) %>% summarise(n=n())


intensity_plot <- ggplot(intensity2, aes(x=times, y=n)) +geom_bar(stat="identity") + xlim(0,32)
intensity_plot <- intensity_plot + xlab("NUmber of Claims") + ylab("Number of Towns") + theme_bw()
intensity_plot

### Save Figure S3.4
#ggsave("C://Users/franc/Dropbox/PhD/dissertation/paper_colonialism_liberalims/tex/2019_version/online_appendix/intensity.png", intensity_plot, width=6)

###---------------------------
#   Measuring bias of LBSF files
###---------------------------
### Compare LBSF with indios dataset 
### 
lbsf_year <- lbsf %>% group_by(year) %>% summarize(total=n()) %>% filter(!is.na(year))
years_v   <- lbsf_year$year
lbsf_year$per <- lbsf_year$total/sum(lbsf_year$total )

## keep only the years that appear in LBSF
agn_indios  <- agn_indios <- read.csv("C://Users/franc/Dropbox/PhD/Edgar_Tlaxcala/AGN/agn_indios_2021.csv")
indios_year <- agn_indios %>% filter(year>=1589 & year <=1688) %>% group_by(year) %>% summarize(total=n()) 
indios_year <- indios_year[indios_year$year %in% years_v,]
indios_year$per <- indios_year$total/sum(indios_year$total)

# Figure S3.5
l <- ggplot(lbsf_year, aes(x=year, per))  + geom_line() + theme_bw() + xlab("") + ylab("Percentage") + ggtitle("LBSF sample")
i <- ggplot(indios_year, aes(x=year, per))  + geom_line() + theme_bw() + xlab("") + ylab("Percentage") +ggtitle("Full sample")
l; i
#ggsave(file= "C:/Users/franc/Dropbox/PhD/dissertation/paper_colonialism_liberalims/tex/FINAL_version_2021/online_appendix/lbsf_bias.png",
#       grid.arrange(l,i, nrow = 2), width=7)

### Statistical test (in supplementary materials' text)
wilcox.test(lbsf_year$per,indios_year$per )

######### Differences in topics and protection 
## Table S3.4
#land
l_lbsf <- table(lbsf$tierras)/sum(table(lbsf$tierras))
l_ind  <- table(agn_indios$tierras_d)/sum(table(agn_indios$tierras_d))
#mistreatment
m_lbsf <- table(lbsf$abusos)/sum(table(lbsf$abusos))
m_ind  <-table(agn_indios$abusos_d)/sum(table(agn_indios$abusos_d))
# taxes
t_lbsf <- table(lbsf$impuestos)/sum(table(lbsf$impuestos))
t_ind  <-table(agn_indios$impuestos_d)/sum(table(agn_indios$impuestos_d))
#comunidad
c_lbsf <- table(lbsf$comunidad)/sum(table(lbsf$comunidad))
c_ind<- table(agn_indios$comunidad_d)/sum(table(agn_indios$comunidad_d))
#### Protection
p_lbsf <- table(lbsf$protect)/sum(table(lbsf$protect))
p_ind  <- table(agn_indios$protect_new2)/sum(table(agn_indios$protect_new2))

### Table
lbsf_per <- c(l_lbsf[2], m_lbsf[2] , t_lbsf[2], c_lbsf[2] ,p_lbsf[2] )
ind_per <- c(l_ind[2], m_ind[2] , t_ind[2], c_ind[2] ,p_ind[2] )

table <- cbind(lbsf_per, ind_per)
rownames(table) <- c("Land", "Mistreatment", "Taxes", "Community", "Protection")
colnames(table) <- c("LBSF sample", "Full sample")
xtable(table)

#----------------------------------------------
# Differentiation between LE and Bureaucrats 
# Table S3.5  
lbsf <- lbsf %>% mutate(Defendant1.1 =as.factor(Defendant1.1),
                        Defendant1.1=relevel(Defendant1.1, ref="LE"))

table( lbsf$tierras,lbsf$Defendant1.1 )
land_p    <- table(lbsf$Defendant1.1, lbsf$tierras)[,2]/(table(lbsf$Defendant1.1))
abusos_p  <- table(lbsf$Defendant1.1, lbsf$abusos)[,2]/(table(lbsf$Defendant1.1))
tax_p     <- table(lbsf$Defendant1.1, lbsf$impuestos)[,2]/(table(lbsf$Defendant1.1))
com_p     <- table(lbsf$Defendant1.1, lbsf$comunidad)[,2]/(table(lbsf$Defendant1.1))

table_p <- rbind(land_p, abusos_p, tax_p, com_p)
other   <- 1-colSums(table_p)
table_p <- rbind(table_p, other)
colnames(table_p) <- c("LE", "Bureaucrats", "Church members", "Indian elites")
rownames(table_p) <- c("Land", "Mistreatment", "Taxes", "Community", "Other/NA")
xtable(table_p*100)

#----------------------------------------------
###### Defendant as DV
### Table S3.6 
model1 <- lm(tierras~Defendant1.1 ,  data=lbsf[ which(lbsf$name!="Mexico"),])
model1_vcov <- cluster.vcov(model1  , ~ lbsf[ which(lbsf$name!="Mexico"),]$name)
model1_vcov <-  coeftest(model1 , model1_vcov)

model2      <- lm(abusos~Defendant1.1,  data=lbsf[ which(lbsf$name!="Mexico"),])
model2_vcov <- cluster.vcov(model2  , ~ lbsf[ which(lbsf$name!="Mexico"),]$name)
model2_vcov <-  coeftest(model2 , model2_vcov)

model3      <- lm(impuestos~Defendant1.1,  data=lbsf[ which(lbsf$name!="Mexico"),])
model3_vcov <- cluster.vcov(model3  , ~ lbsf[ which(lbsf$name!="Mexico"),]$name)
model3_vcov <-  coeftest(model3, model3_vcov)

model4      <- lm(comunidad~Defendant1.1,  data=lbsf[ which(lbsf$name!="Mexico"),])
model4_vcov <- cluster.vcov(model4  , ~ lbsf[ which(lbsf$name!="Mexico"),]$name)
model4_vcov <-  coeftest(model4, model4_vcov)

stargazer(model1, model2, model3, model4, 
          se   = list( model1_vcov[, 2],model2_vcov[,2], model3_vcov[,2] , model4_vcov[,2] 
          ) ,
          p    = list( model1_vcov[, 4], model2_vcov[,4],model3_vcov[,4] , model4_vcov[,4]
          ),
          dep.var.labels= c(""),
          column.labels   = c("Land", "Mistreatment", "Taxes", "Community"),
          column.separate = c(1,1,1,1),
          covariate.labels =c("Bureaucrats", "Churchmen", "Indian elites"),
          #column.labels = c("Local Elites", "Bureaucrats", "Church members", "Indian elites"
          #),
          #column.separate = c(1,1,1,1),
          object.names = F,
          #model.names = c("", "Decrease", "Increase", "Strong", "Weak", "Strong", "Weak"),
          keep = c("Defendant1.1Buro", "Defendant1.1Church", "Defendant1.1Indian"),
          keep.stat = c("n", "adj.rsq"),
          label="perpetrators",
          title =("Perpetators and topics of claims") 
          #out="model_perpetrators.tex"
          )



#--------------------------------------------------
##### Most discriminant words
### Figure 3.6
### remove spanish stop words
### download json file
# grab the data
#raw_data <- getURL("https://countwordsfree.com/stopwords/spanish/json")
# Then covert from JSON into a list in R
#data <- fromJSON(raw_data)
stop_spanish <- stopwords("es")
extra_words <- c("naturales", "natural", "indio", "indios",  "juan", "san", "santa", "mas", "asi", 
                 "ano", "año", "dicen", "personas", "ahora", "dice", "mayor", "dar"
                 )## extra words to be removed
stop_spanish <- c(stop_spanish, extra_words)
# We can coerce this to a data.frame
stop_spanish<- as.data.frame(as.matrix(stop_spanish ))
colnames(stop_spanish) <- "word"

####  Top words for LE
texts_le <- lbsf %>% filter(Defendant1.1=="LE") %>%  dplyr::select(Text_clean)
## Clean
texts_le <- texts_le %>% mutate(text_clean=tolower(Text_clean), 
                                text_clean=gsub("[^0-9A-Za-z///' ]","" , text_clean ,ignore.case = TRUE)
                                ) %>% dplyr::select(text_clean)

texts_le <- texts_le %>% unnest_tokens(output = word, input = text_clean) 


### antijoin
texts_le <- texts_le %>% anti_join(stop_spanish)

text_wordcounts_le <- texts_le  %>% count(word, sort = TRUE)


###plot
le <- ggplot(text_wordcounts_le[1:15,], aes(x=reorder(word, +n), y=n)) + geom_bar(stat="identity", fill="darkred") + coord_flip()
le <- le  + theme_bw() + xlab("Word") + ylab("Count") 
le <- le + geom_text(aes(label=n), hjust=1.2, color="white", fontface="bold")
le <- le + ggtitle("Local Elites")
le

#### Top words for Buro 
texts_buro <- lbsf %>% filter(Defendant1.1=="Buro") %>%  dplyr::select(Text_clean)
## Clean
texts_buro <- texts_buro %>% mutate(text_clean=tolower(Text_clean), 
                                text_clean=gsub("[^0-9A-Za-z///' ]","" , text_clean ,ignore.case = TRUE)
) %>% dplyr::select(text_clean)

texts_buro <- texts_buro %>% unnest_tokens(output = word, input = text_clean)    
### antijoin
texts_buro <- texts_buro %>% anti_join(stop_spanish)

text_wordcounts_buro <- texts_buro  %>% count(word, sort = TRUE)

###plot
buro <- ggplot(text_wordcounts_buro[1:15,], aes(x=reorder(word, +n), y=n)) + geom_bar(stat="identity", fill="darkred") + coord_flip()
buro <- buro  + theme_bw() + xlab("") + ylab("Count") 
buro <- buro + geom_text(aes(label=n), hjust=1.2, color="white", fontface="bold")
buro <- buro + ggtitle("Bureaucrats")
buro 


###### Top words for Indian LE
texts_indian <- lbsf %>% filter(Defendant1.1=="Indian") %>%  dplyr::select(Text_clean)
## Clean
texts_indian <- texts_indian %>% mutate(text_clean=tolower(Text_clean), 
                                    text_clean=gsub("[^0-9A-Za-z///' ]","" , text_clean ,ignore.case = TRUE)
) %>% dplyr::select(text_clean)

texts_indian <- texts_indian %>% unnest_tokens(output = word, input = text_clean)    
### antijoin
texts_indian <- texts_indian %>% anti_join(stop_spanish)

text_wordcounts_indian <- texts_indian  %>% count(word, sort = TRUE)

###plot
indian <- ggplot(text_wordcounts_indian[1:15,], aes(x=reorder(word, +n), y=n)) + geom_bar(stat="identity", fill="darkred") + coord_flip()
indian <- indian  + theme_bw() + xlab("Word") + ylab("Count") 
indian <- indian + geom_text(aes(label=n), hjust=1.2, color="white", fontface="bold")
indian <- indian + ggtitle("Indian Elites")
indian

##### Top words for Church 
texts_church<- lbsf %>% filter(Defendant1.1=="Church") %>%  dplyr::select(Text_clean)
## Clean
texts_church<- texts_church%>% mutate(text_clean=tolower(Text_clean), 
                                        text_clean=gsub("[^0-9A-Za-z///' ]","" , text_clean ,ignore.case = TRUE)
) %>% dplyr::select(text_clean)

texts_church<- texts_church%>% unnest_tokens(output = word, input = text_clean)    
### antijoin
texts_church<- texts_church%>% anti_join(stop_spanish)

text_wordcounts_church<- texts_church %>% count(word, sort = TRUE)

###plot
church <- ggplot(text_wordcounts_church[1:15,], aes(x=reorder(word, +n), y=n)) + geom_bar(stat="identity", fill="darkred") + coord_flip()
church <- church  + theme_bw() + xlab("") + ylab("Count") 
church <- church + geom_text(aes(label=n), hjust=1.2, color="white", fontface="bold")
church <- church + ggtitle("Church")
church
##### Grid
#### Save grid 
# ggsave(file= "C:/Users/franc/Dropbox/PhD/dissertation/paper_colonialism_liberalims/tex/2019_version/online_appendix/texts.png",
#        grid.arrange(le,buro,church, indian, nrow = 2), width=7)
# 



